# install.packages("ggplot2", dependencies = TRUE)
# install.packages("data.table", dependencies = TRUE)
# install.packages("tidyr", dependencies = TRUE)
# install.packages("reshape2", dependencies = TRUE)
# install.packages("dplyr", dependencies = TRUE)
# install.packages("plotly", dependencies = TRUE)
# install.packages("shiny", dependencies = TRUE)
# install.packages("shinythemes", dependencies = TRUE)
# install.packages("DT", dependencies = TRUE)
library(stringr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(data.table)
## Warning: package 'data.table' was built under R version 4.3.3
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.3.3
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.3.3
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
## The following objects are masked from 'package:data.table':
##
## dcast, melt
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(shiny)
## Warning: package 'shiny' was built under R version 4.3.3
library(shinythemes)
## Warning: package 'shinythemes' was built under R version 4.3.3
library(DT)
## Warning: package 'DT' was built under R version 4.3.3
##
## Attaching package: 'DT'
## The following objects are masked from 'package:shiny':
##
## dataTableOutput, renderDataTable
df_sleep <- read.csv("Sleep_health_and_lifestyle_dataset.csv", sep=",", header = TRUE)
df_sleep2 <- df_sleep %>%
select(-c(Person.ID))
df_sleep2$Gender <- df_sleep2$Gender %>%
as.factor(.)
df_sleep2$Occupation <- df_sleep2$Occupation %>%
as.factor(.)
df_sleep2$BMI.Category <- df_sleep2$BMI.Category %>%
factor(., levels = c("Normal", "Normal Weight", "Overweight", "Obese"))
df_sleep$Sleep.Disorder <- df_sleep2$Sleep.Disorder %>%
factor(., levels = c("None", "Insomnia", "Sleep Apnea"))
df_sleep2 <- df_sleep2 %>%
mutate(BMI.Category = recode(BMI.Category, "Normal Weight" = "Normal"))
df_sleep_new <- df_sleep2 %>%
separate_wider_delim(Blood.Pressure, delim = "/", names = c("Systolic.Pressure", "Diastolic.Pressure"))
df_sleep_new$Systolic.Pressure <- df_sleep_new$Systolic.Pressure %>%
as.integer(.)
df_sleep_new$Diastolic.Pressure <- df_sleep_new$Diastolic.Pressure %>%
as.integer(.)
df_sleep_new <- df_sleep_new %>%
mutate(
Tension.Category = case_when(
Systolic.Pressure < 90 | Diastolic.Pressure < 60 ~ "Hypotension",
Systolic.Pressure >= 90 & Systolic.Pressure <= 119 & Diastolic.Pressure >= 60 & Diastolic.Pressure <= 79 ~ "Normal",
Systolic.Pressure > 90 | Diastolic.Pressure > 60 ~ "Hypertension"
)
)
df_sleep_new$Tension.Category <- df_sleep_new$Tension.Category %>%
as.factor(.)
df_sleep_clean <- df_sleep_new %>%
distinct(.)
chart_1 <- df_sleep_clean %>%
ggplot(mapping = aes(x = Sleep.Disorder,
y = Sleep.Duration)) +
geom_boxplot() +
labs(x = "Sleep Disorder", y = "Sleep Duration") +
theme_minimal()
ggplotly(chart_1)
chart_2 <- df_sleep_clean %>%
ggplot(mapping = aes(x = Sleep.Disorder,
y = Daily.Steps)) +
geom_boxplot() +
labs(x = "Sleep Disorder", y = "Daily Steps") +
theme_minimal()
ggplotly(chart_2)
chart_3 <- df_sleep_clean %>%
ggplot(mapping = aes(x = Sleep.Disorder,
y = Heart.Rate)) +
geom_boxplot() +
labs(x = "Sleep Disorder", y = "Heart Rate") +
theme_minimal()
ggplotly(chart_3)
chart_4 <- df_sleep_clean %>%
ggplot(mapping = aes(x = Sleep.Disorder,
y = Physical.Activity.Level)) +
geom_boxplot() +
labs(x = "Sleep Disorder", y = "Physical Activity Level") +
theme_minimal()
ggplotly(chart_4)
chart_5 <- df_sleep_clean %>%
ggplot(mapping = aes(x = Sleep.Disorder,
y = Stress.Level)) +
geom_boxplot() +
labs(x = "Sleep Disorder", y = "Stress Level") +
theme_minimal()
ggplotly(chart_5)
chart_6 <- df_sleep_clean %>%
ggplot(mapping = aes(x = Sleep.Disorder,
y = Quality.of.Sleep)) +
geom_boxplot() +
labs(x = "Sleep Disorder", y = "Quality of Sleep") +
theme_minimal()
ggplotly(chart_6)
Insight : there are some outliers in the data based on the chart we displayed (we group the data based on the Sleep Disorder because we consider it as the most compatible factor to gain insight from).
Since we are aiming to keep the originality of the data we are using, we won’t be handling the outliers.
df_sleep_clean %>%
summary(.)
## Gender Age Occupation Sleep.Duration Quality.of.Sleep
## Female:65 Min. :27.00 Nurse :29 Min. :5.800 Min. :4.000
## Male :67 1st Qu.:33.75 Doctor :24 1st Qu.:6.400 1st Qu.:6.000
## Median :41.00 Engineer :22 Median :7.150 Median :7.000
## Mean :41.13 Lawyer :15 Mean :7.083 Mean :7.152
## 3rd Qu.:49.00 Teacher :15 3rd Qu.:7.725 3rd Qu.:8.000
## Max. :59.00 Accountant:11 Max. :8.500 Max. :9.000
## (Other) :16
## Physical.Activity.Level Stress.Level BMI.Category Systolic.Pressure
## Min. :30.00 Min. :3.000 Normal :73 Min. :115.0
## 1st Qu.:44.25 1st Qu.:4.000 Overweight:52 1st Qu.:120.8
## Median :60.00 Median :6.000 Obese : 7 Median :130.0
## Mean :58.39 Mean :5.538 Mean :128.4
## 3rd Qu.:75.00 3rd Qu.:7.000 3rd Qu.:135.0
## Max. :90.00 Max. :8.000 Max. :142.0
##
## Diastolic.Pressure Heart.Rate Daily.Steps Sleep.Disorder
## Min. :75.00 Min. :65.0 Min. : 3000 Length:132
## 1st Qu.:80.00 1st Qu.:68.0 1st Qu.: 5000 Class :character
## Median :85.00 Median :70.0 Median : 7000 Mode :character
## Mean :84.54 Mean :71.2 Mean : 6638
## 3rd Qu.:88.50 3rd Qu.:74.0 3rd Qu.: 8000
## Max. :95.00 Max. :86.0 Max. :10000
##
## Tension.Category
## Hypertension:117
## Normal : 15
##
##
##
##
##
vis_1 <- table(df_sleep_clean$Occupation, df_sleep_clean$Sleep.Disorder) %>%
as.data.frame(.) %>%
group_by(Var1) %>%
mutate(Total = sum(Freq),
Percentage = Freq * 100 / Total) %>%
ungroup(.) %>%
ggplot(., aes(x = Var1,
y = Freq,
fill = Var2,
text = paste("Occupation :", Var1,
"<br>Sleep Disorder :", Var2,
"<br>Percentage :", round(Percentage, 1), "%"))) +
geom_bar(position = "fill",
stat = "identity") +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 0,
vjust = 0.5,
hjust = 0.5),
plot.margin = margin(t = 20)) +
labs(title = "Percentage of Sleep Disorder per Occupation",
x = "Occupation",
y = "Percentage",
fill = "Sleep Disorder") +
scale_x_discrete(labels = function(x) str_wrap(x, width = 10))
ggplotly(vis_1, tooltip = "text")
Insight : Sales Representation has the highest percentage of Sleep Apnea, while Salesperson has the highest percentage of Insomnia.
vis_2 <- table(df_sleep_clean$Occupation, df_sleep_clean$BMI.Category) %>%
as.data.frame(.) %>%
group_by(Var1) %>%
mutate(Total = sum(Freq),
Percentage = Freq * 100 / Total) %>%
ungroup(.) %>%
ggplot(., aes(x = Var1,
y = Freq,
fill = Var2,
text = paste("Occupation :", Var1,
"<br>BMI Category :", Var2,
"<br>Percentage :", round(Percentage, 1), "%"))) +
geom_bar(position = "fill",
stat = "identity") +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 0,
vjust = 0.5,
hjust = 0.5),
plot.margin = margin(t = 20)) +
labs(title = "Percentage of BMI Category per Occupation",
x = "Occupation",
y = "Percentage",
fill = "BMI Category")+
scale_x_discrete(labels = function(x) str_wrap(x, width = 10))
ggplotly(vis_2, tooltip = "text")
Insight : Sales Representation has the highest percentage of Obese, while Salesperson, Manager, and Scientist has the highest percentage of Overweight.
vis_3 <- table(df_sleep_clean$BMI.Category, df_sleep_clean$Sleep.Disorder) %>%
as.data.frame(.) %>%
group_by(Var1) %>%
mutate(Total = sum(Freq),
Percentage = Freq * 100 / Total) %>%
ungroup(.) %>%
ggplot(., aes(x = Var1,
y = Freq,
fill = Var2,
text = paste("BMI Category :", Var1,
"<br>Sleep Disorder :", Var2,
"<br>Percentage :", round(Percentage, 1), "%"))) +
geom_bar(position = "fill",
stat = "identity") +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
theme_minimal() +
labs(title = "Percentage of Sleep Disorder per BMI Category",
x = "BMI Category",
y = "Percentage",
fill = "Sleep Disorder")
ggplotly(vis_3, tooltip = "text")
Insight : Normal weighted people are more likely to have no sleep disorder than the other BMI categories, and there are no single person with obese that has no sleep disorder.
vis_4 <- df_sleep_clean %>%
ggplot(., aes(x = BMI.Category,
y = Quality.of.Sleep,
fill = BMI.Category)) +
geom_boxplot(width = 5) +
coord_flip() +
scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
theme_minimal() +
labs(title = "Quality of Sleep based on BMI Category",
x = "BMI Category",
y = "Quality of Sleep",
fill = "BMI Category") +
expand_limits(x = 0, y = 0)
ggplotly(vis_4)
Insight : Normal weighted people have the best quality of sleep. People with the worst sleep quality are more likely to have higher BMI score/category.
vis_5 <- df_sleep_clean %>%
ggplot(., aes(x = BMI.Category,
y = Sleep.Duration,
fill = BMI.Category)) +
geom_boxplot(width = 5) +
coord_flip() +
scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
theme_minimal() +
labs(title = "Sleep Duration based on BMI Category",
x = "BMI Category",
y = "Sleep Duration",
fill = "BMI Category") +
expand_limits(x = 0, y = 0)
ggplotly(vis_5)
Insight : Normal weighted people has the healthiest duration of sleep. People with shorter sleep duration are more likely to have higher BMI score/category.
vis_6 <- df_sleep_clean %>%
ggplot(., aes(x = Sleep.Disorder,
y = Age,
fill = Sleep.Disorder)) +
geom_boxplot(width = 5) +
coord_flip() +
scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
theme_minimal() +
labs(title = "Age Distribution based on Sleep Disorder",
x = "Sleep Disorder",
y = "Age",
fill = "Sleep Disorder") +
expand_limits(x = 0, y = 0)
ggplotly(vis_6)
Insight : Older people are more likely to get Sleep Apnea than young people.
heatmap <- df_sleep_clean %>%
select_if(is.numeric) %>%
cor() %>%
melt() %>%
ggplot(aes(x = Var1,
y = Var2,
fill = value,
label = round(value, 2),
text = paste("Variable 1 :", Var1,
"<br>Variable 2 :", Var2,
"<br>Correlation Coefficient :", round(value, 2)))) +
geom_tile(color = "white") +
geom_text(color = "black") +
scale_fill_gradient2(low = "#FF751A",
mid = "white",
high = "#3C7237",
midpoint = 0,
limit = c(-1, 1),
space = "Lab",
name = "Correlation") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 0,
vjust = 0.5,
hjust = 0.5),
axis.text.y = element_text(hjust = 1)) +
labs(title = "Correlation Heatmap",
x = "",
y = "") +
scale_x_discrete(labels = function(x) str_wrap(str_replace_all(x, "\\.", " "), width = 10)) +
scale_y_discrete(labels = function(x) str_wrap(str_replace_all(x, "\\.", " "), width = 10))
ggplotly(heatmap, tooltip = "text")
Insight : Positively correlated variables : - Sleep Duration and Quality of Sleep - Systolic and Diastolic Pressure - Physical Activity Level and Daily Steps Negatively correlated variables : - Stress Level and Quality of Sleep - Stress Level and Sleep Duration - Quality of Sleep and Heart Rate
ui <- fluidPage(
# Website Theme
theme = shinytheme("cosmo"),
title = "Assurance of Learning Data Mining and Visualization Group 3",
# Page Title
titlePanel(tags$b("Assurance of Learning Data Mining and Visualization Group 3")),
# Navigation Bar
navbarPage(
"Navigation",
# Display Group
tabPanel(
"Group Members",
HTML("<h4><b>Class : LG09</b></h4>
<h4><b>Group : 3</b></h4>
<h5><b>1. Clarissa Beatrice Kosasih / 2702209350</b></h5>
<h5><b>2. Marcelline Cathrine Wilison / 2702210604 </b></h5>
<h5><b>3. Miecel Alicia Angel J / 2702327601</b></h5>
<h5><b>4. William / 2702225373</b></h5>
<h5><b>5. William Darma Wijaya / 2702218645</b></h5>"),
),
# Display Raw Dataset
tabPanel(
"Raw Dataset",
HTML("<h3><b>Sleep Health and Lifestyle Dataset</b></h3>"),
HTML("<h4><b>Code</b></h4>"),
verbatimTextOutput("code1"),
HTML("<h4><b>Output</b></h4>"),
DTOutput("dataset"),
),
# Data Preparation
tabPanel(
"Data Preparation",
tabsetPanel(
# Assess General Characteristics
tabPanel(
tags$b("Assess General Characteristics"),
HTML("<h3><b>Dataset Structures</b></h3>"),
HTML("<h4><b>Code</b></h4>"),
verbatimTextOutput("code2"),
HTML("<h4><b>Output</b></h4>"),
verbatimTextOutput("str"),
HTML("<h3><b>Remove Person.ID Column</b></h3>"),
HTML("<h4><b>Code</b></h4>"),
verbatimTextOutput("code3"),
HTML("<h3><b>Change Categorical Column to Factor Data Type</b></h3>"),
HTML("<h4><b>Code</b></h4>"),
verbatimTextOutput("code4"),
HTML("<h3><b>Handle Inconsistency Data</b></h3>"),
HTML("BMI.Category column has inconsistency in 'Normal' and 'Normal Weight' Data."),
HTML("<h4><b>Code</b></h4>"),
verbatimTextOutput("code5"),
HTML("<h4><b>Dataset Structure</b></h4>"),
verbatimTextOutput("str2"),
HTML("<h4><b>Dataset</b></h4>"),
DTOutput("dataset2"),
),
# Feature Engineering
tabPanel(
tags$b("Feature Engineering"),
HTML("<h3><b>Separate Systolic and Diastolic Pressure</b></h3>"),
HTML("<h4><b>Code</b></h4>"),
verbatimTextOutput("code6"),
HTML("<h3><b>Create Tension Categories</b></h3>"),
HTML("<h4><b>Code</b></h4>"),
verbatimTextOutput("code16"),
HTML("<h3><b>Dataset</b></h3>"),
DTOutput("dataset3"),
),
# Data Cleaning
tabPanel(
tags$b("Data Cleaning"),
HTML("<h3><b>Check Missing Values</b></h3>"),
HTML("<h4><b>Code</b></h4>"),
verbatimTextOutput("code7"),
HTML("<h4><b>Output</b></h4>"),
verbatimTextOutput("checkmiss"),
HTML("<b>Insight :
</br>There are no missing values in the dataset.</b>"),
HTML("<h3><b>Remove Duplicates</b></h3>"),
HTML("<h4><b>Code</b></h4>"),
verbatimTextOutput("code8"),
HTML("<h3><b>Get Statistical Summary</b></h3>"),
HTML("<h4><b>Code</b></h4>"),
verbatimTextOutput("code9"),
HTML("<h4><b>Output</b></h4>"),
verbatimTextOutput("statsum"),
HTML("<h3><b>Check Outliers</b></h3>"),
HTML("<h4><b>Sleep Duration</br>Code</b></h4>"),
verbatimTextOutput("code10"),
HTML("<h4><b>Output</b></h4>"),
plotlyOutput("plot1", height = "500px", width = "700px"),
HTML("<h4><b>Daily Steps</br>Code</b></h4>"),
verbatimTextOutput("code11"),
HTML("<h4><b>Output</b></h4>"),
plotlyOutput("plot2", height = "500px", width = "700px"),
HTML("<h4><b>Heart Rate</br>Code</b></h4>"),
verbatimTextOutput("code12"),
HTML("<h4><b>Output</b></h4>"),
plotlyOutput("plot3", height = "500px", width = "700px"),
HTML("<h4><b>Physical Activity Level</br>Code</b></h4>"),
verbatimTextOutput("code13"),
HTML("<h4><b>Output</b></h4>"),
plotlyOutput("plot4", height = "500px", width = "700px"),
HTML("<h4><b>Stress Level</br>Code</b></h4>"),
verbatimTextOutput("code14"),
HTML("<h4><b>Output</b></h4>"),
plotlyOutput("plot5", height = "500px", width = "700px"),
HTML("<h4><b>Quality of Sleep</br>Code</b></h4>"),
verbatimTextOutput("code15"),
HTML("<h4><b>Output</b></h4>"),
plotlyOutput("plot6", height = "500px", width = "700px"),
HTML("<b>Insight :
</br>There are some outliers in the data based on the chart we displayed.
</br>We group the data based on the Sleep Disorder because we consider it as the most compatible factor to gain insight from.
</br>Since we are aiming to keep the originality of the data we are using, we won't be handling the outliers.</b></br></br></br></br></br>"),
),
),
),
# Data Visualization
tabPanel(
"Data Visualization",
tabsetPanel(
# Composition Graphs
tabPanel(
tags$b("Composition Graphs"),
# Visualization 1
HTML("<h3><b>Sleep Disorder Composition based on Occupation</b></h3>"),
HTML("<h4><b>Code</b></h4>"),
verbatimTextOutput("code17"),
HTML("<h4><b>Output</b></h4>"),
plotlyOutput("vis1", height = "700px", width = "980px"),
HTML("<b>Insight :
</br>Sales Representation has the highest percentage of Sleep Apnea, while Salesperson has the highest percentage of Insomnia.</b>"),
# Visualization 2
HTML("<h3><b>BMI Category Composition based on Occupation</b></h3>"),
HTML("<h4><b>Code</b></h4>"),
verbatimTextOutput("code18"),
HTML("<h4><b>Output</b></h4>"),
plotlyOutput("vis2", height = "700px", width = "980px"),
HTML("<b>Insight :
</br>Sales Representation has the highest percentage of Obese, while Salesperson, Manager, and Scientist has the highest percentage of Overweight.</b>"),
# Visualization 3
HTML("<h3><b>Sleep Disorder Composition based on BMI Category</b></h3>"),
HTML("<h4><b>Code</b></h4>"),
verbatimTextOutput("code19"),
HTML("<h4><b>Output</b></h4>"),
plotlyOutput("vis3", height = "700px", width = "980px"),
HTML("<b>Insight :
</br>Normal weighted people are more likely to have no sleep disorder than the other BMI categories, and there are no single person with obese that has no sleep disorder.</b></br></br></br>"),
),
# Distribution Graphs
tabPanel(
tags$b("Distribution Graphs"),
# Visualization 4
HTML("<h3><b>Sleep Quality based on BMI Category</b></h3>"),
HTML("<h4><b>Code</b></h4>"),
verbatimTextOutput("code20"),
HTML("<h4><b>Output</b></h4>"),
plotlyOutput("vis4", height = "500px", width = "980px"),
HTML("<b>Insight :
</br>Normal weighted people have the best quality of sleep.
</br>People with the worst sleep quality are more likely to have higher BMI score/category.</b>"),
# Visualization 5
HTML("<h3><b>Sleep Duration based on BMI Category</b></h3>"),
HTML("<h4><b>Code</b></h4>"),
verbatimTextOutput("code21"),
HTML("<h4><b>Output</b></h4>"),
plotlyOutput("vis5", height = "500px", width = "980px"),
HTML("<b>Insight :
</br>Normal weighted people has the healthiest duration of sleep.
</br>People with shorter sleep duration are more likely to have higher BMI score/category.</b>"),
# Visualization 5
HTML("<h3><b>Age Distribution based on Sleep Disorder</b></h3>"),
HTML("<h4><b>Code</b></h4>"),
verbatimTextOutput("code22"),
HTML("<h4><b>Output</b></h4>"),
plotlyOutput("vis6", height = "500px", width = "980px"),
HTML("<b>Insight :
</br>Older people are more likely to get Sleep Apnea than young people.</b></br></br></br>"),
),
),
),
# Correlation Analysis
tabPanel(
"Correlation Analysis",
HTML("<h3><b>Sleep Health and Lifestyle Heatmap</b></h3>"),
HTML("<h4><b>Code</b></h4>"),
verbatimTextOutput("code24"),
HTML("<h4><b>Output</b></h4>"),
plotlyOutput("heatmap", height = "700px", width = "980px"),
HTML("<b>Insight :
</br>Positively correlated variables :
</br>- Sleep Duration and Quality of Sleep
</br>- Systolic and Diastolic Pressure
</br>- Physical Activity Level and Daily Steps
</br>Negatively correlated variables :
</br>- Stress Level and Quality of Sleep
</br>- Stress Level and Sleep Duration
</br>- Quality of Sleep and Heart Rate</b></br></br></br>"),
),
# Linear Regression
tabPanel(
"Linear Regression",
sidebarPanel(
selectInput("variable1", label = "X Axis:",
choices = names(df_sleep_clean)[sapply(df_sleep_clean, is.numeric)]),
selectInput("variable2", label = "Y Axis:",
choices = names(df_sleep_clean)[sapply(df_sleep_clean, is.numeric)]),
actionButton("submitbutton", "Submit", class = "btn btn-primary"),
),
mainPanel(
plotlyOutput("visualization"),
verbatimTextOutput("summary")
),
),
# Hypothesis Testing
tabPanel(
"Hypothesis Testing",
sidebarPanel(
selectInput("variable1h", label = "Dependent Variable :",
choices = names(df_sleep_clean)[sapply(df_sleep_clean, is.numeric)]),
selectInput("variable2h", label = "Independent Variable :",
choices = names(df_sleep_clean)[sapply(df_sleep_clean, is.numeric)]),
actionButton("submitbutton2", "Submit", class = "btn btn-primary"),
),
mainPanel(
HTML("<h4><b>P-Value</b></h4>"),
verbatimTextOutput("htest")
),
),
),
)
server <- function(input, output) {
output$code1 <- renderPrint({
code <- 'df_sleep <- read.csv("Sleep_health_and_lifestyle_dataset.csv", sep=",", header = TRUE)\ndf_sleep'
cat(code)
})
output$dataset <- renderDT({
df_sleep
})
output$code2 <- renderPrint({
code <- 'str(df_sleep)'
cat(code)
})
output$str <- renderPrint({
str(df_sleep)
})
output$code3 <- renderPrint({
code <- 'df_sleep2 <- df_sleep %>%
select(-c(Person.ID))'
cat(code)
})
output$code4 <- renderPrint({
code <- 'df_sleep2$Gender <- df_sleep2$Gender %>%
as.factor(.)
df_sleep2$Occupation <- df_sleep2$Occupation %>%
as.factor(.)
df_sleep2$BMI.Category <- df_sleep2$BMI.Category %>%
factor(., levels = c("Normal", "Normal Weight", "Overweight", "Obese"))
df_sleep$Sleep.Disorder <- df_sleep2$Sleep.Disorder %>%
factor(., levels = c("None", "Insomnia", "Sleep Apnea"))'
cat(code)
})
output$code5 <- renderPrint({
code <- 'df_sleep2 <- df_sleep2 %>%
mutate(BMI.Category = recode(BMI.Category, "Normal Weight" = "Normal"))'
cat(code)
})
output$str2 <- renderPrint({
str(df_sleep2)
})
output$dataset2 <- renderDT({
df_sleep2
})
output$code6 <- renderPrint({
code <- 'df_sleep_new <- df_sleep2 %>%
separate_wider_delim(Blood.Pressure, delim = "/", names = c("Systolic.Pressure", "Diastolic.Pressure"))
df_sleep_new$Systolic.Pressure <- df_sleep_new$Systolic.Pressure %>%
as.integer(.)
df_sleep_new$Diastolic.Pressure <- df_sleep_new$Diastolic.Pressure %>%
as.integer(.)'
cat(code)
})
output$code16 <- renderPrint({
code <- 'df_sleep_new <- df_sleep_new %>%
mutate(
Tension.Category = case_when(
Systolic.Pressure < 90 | Diastolic.Pressure < 60 ~ "Hypotension",
Systolic.Pressure >= 90 & Systolic.Pressure <= 119 & Diastolic.Pressure >= 60 & Diastolic.Pressure <= 79 ~ "Normal",
Systolic.Pressure > 90 | Diastolic.Pressure > 60 ~ "Hypertension"
)
)
df_sleep_new$Tension.Category <- df_sleep_new$Tension.Category %>%
as.factor(.)'
cat(code)
})
output$dataset3 <- renderDT({
df_sleep_new
})
output$code7 <- renderPrint({
code <- 'sum(is.na(df_sleep_new))'
cat(code)
})
output$checkmiss <- renderPrint({
sum(is.na(df_sleep_new))
})
output$code8 <- renderPrint({
code <- 'df_sleep_clean <- df_sleep_new %>%
distinct(.)'
cat(code)
})
output$code9 <- renderPrint({
code <- 'df_sleep_clean %>%
summary(.)'
cat(code)
})
output$statsum <- renderPrint({
df_sleep_clean %>%
summary(.)
})
output$code10 <- renderPrint({
code <- 'chart_1 <- df_sleep_clean %>%
ggplot(mapping = aes(x = Sleep.Disorder,
y = Sleep.Duration)) +
geom_boxplot() +
labs(x = "Sleep Disorder", y = "Sleep Duration") +
theme_minimal()
ggplotly(chart_1)'
cat(code)
})
output$plot1 <- renderPlotly({
ggplotly(chart_1)
})
output$code11 <- renderPrint({
code <- 'chart_2 <- df_sleep_clean %>%
ggplot(mapping = aes(x = Sleep.Disorder,
y = Daily.Steps)) +
geom_boxplot() +
labs(x = "Sleep Disorder", y = "Daily Steps") +
theme_minimal()
ggplotly(chart_2)'
cat(code)
})
output$plot2 <- renderPlotly({
ggplotly(chart_2)
})
output$code12 <- renderPrint({
code <- 'chart_3 <- df_sleep_clean %>%
ggplot(mapping = aes(x = Sleep.Disorder,
y = Heart.Rate)) +
geom_boxplot() +
labs(x = "Sleep Disorder", y = "Heart Rate") +
theme_minimal()
ggplotly(chart_3)'
cat(code)
})
output$plot3 <- renderPlotly({
ggplotly(chart_3)
})
output$code13 <- renderPrint({
code <- 'chart_4 <- df_sleep_clean %>%
ggplot(mapping = aes(x = Sleep.Disorder,
y = Physical.Activity.Level)) +
geom_boxplot() +
labs(x = "Sleep Disorder", y = "Physical Activity Level") +
theme_minimal()
ggplotly(chart_4)'
cat(code)
})
output$plot4 <- renderPlotly({
ggplotly(chart_4)
})
output$code14 <- renderPrint({
code <- 'chart_5 <- df_sleep_clean %>%
ggplot(mapping = aes(x = Sleep.Disorder,
y = Stress.Level)) +
geom_boxplot() +
labs(x = "Sleep Disorder", y = "Stress Level") +
theme_minimal()
ggplotly(chart_5)'
cat(code)
})
output$plot5 <- renderPlotly({
ggplotly(chart_5)
})
output$code15 <- renderPrint({
code <- 'chart_6 <- df_sleep_clean %>%
ggplot(mapping = aes(x = Sleep.Disorder,
y = Quality.of.Sleep)) +
geom_boxplot() +
labs(x = "Sleep Disorder", y = "Quality of Sleep") +
theme_minimal()
ggplotly(chart_6)'
cat(code)
})
output$plot6 <- renderPlotly({
ggplotly(chart_6)
})
output$code17 <- renderPrint({
code <- 'vis_1 <- table(df_sleep_clean$Occupation, df_sleep_clean$Sleep.Disorder) %>%
as.data.frame(.) %>%
group_by(Var1) %>%
mutate(Total = sum(Freq),
Percentage = Freq * 100 / Total) %>%
ungroup(.) %>%
ggplot(., aes(x = Var1,
y = Freq,
fill = Var2,
text = paste("Occupation :", Var1,
"<br>Sleep Disorder :", Var2,
"<br>Percentage :", round(Percentage, 1), "%"))) +
geom_bar(position = "fill",
stat = "identity") +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 0,
vjust = 0.5,
hjust = 0.5),
plot.margin = margin(t = 20)) +
labs(title = "Percentage of Sleep Disorder per Occupation",
x = "Occupation",
y = "Percentage",
fill = "Sleep Disorder") +
scale_x_discrete(labels = function(x) str_wrap(x, width = 10))
ggplotly(vis_1, tooltip = "text")'
cat(code)
})
output$vis1 <- renderPlotly({
ggplotly(vis_1, tooltip = "text")
})
output$code18 <- renderPrint({
code <- 'vis_2 <- table(df_sleep_clean$Occupation, df_sleep_clean$BMI.Category) %>%
as.data.frame(.) %>%
group_by(Var1) %>%
mutate(Total = sum(Freq),
Percentage = Freq * 100 / Total) %>%
ungroup(.) %>%
ggplot(., aes(x = Var1,
y = Freq,
fill = Var2,
text = paste("Occupation :", Var1,
"<br>BMI Category :", Var2,
"<br>Percentage :", round(Percentage, 1), "%"))) +
geom_bar(position = "fill",
stat = "identity") +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 0,
vjust = 0.5,
hjust = 0.5),
plot.margin = margin(t = 20)) +
labs(title = "Percentage of BMI Category per Occupation",
x = "Occupation",
y = "Percentage",
fill = "BMI Category")+
scale_x_discrete(labels = function(x) str_wrap(x, width = 10))
ggplotly(vis_2, tooltip = "text")'
cat(code)
})
output$vis2 <- renderPlotly({
ggplotly(vis_2, tooltip = "text")
})
output$code19 <- renderPrint({
code <- 'vis_3 <- table(df_sleep_clean$BMI.Category, df_sleep_clean$Sleep.Disorder) %>%
as.data.frame(.) %>%
group_by(Var1) %>%
mutate(Total = sum(Freq),
Percentage = Freq * 100 / Total) %>%
ungroup(.) %>%
ggplot(., aes(x = Var1,
y = Freq,
fill = Var2,
text = paste("BMI Category :", Var1,
"<br>Sleep Disorder :", Var2,
"<br>Percentage :", round(Percentage, 1), "%"))) +
geom_bar(position = "fill",
stat = "identity") +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
theme_minimal() +
labs(title = "Percentage of Sleep Disorder per BMI Category",
x = "BMI Category",
y = "Percentage",
fill = "Sleep Disorder")
ggplotly(vis_3, tooltip = "text")'
cat(code)
})
output$vis3 <- renderPlotly({
ggplotly(vis_3, tooltip = "text")
})
output$code20 <- renderPrint({
code <- 'vis_4 <- df_sleep_clean %>%
ggplot(., aes(x = BMI.Category,
y = Quality.of.Sleep,
fill = BMI.Category)) +
geom_boxplot(width = 5) +
coord_flip() +
scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
theme_minimal() +
labs(title = "Quality of Sleep based on BMI Category",
x = "BMI Category",
y = "Quality of Sleep",
fill = "BMI Category") +
expand_limits(x = 0, y = 0)
ggplotly(vis_4)'
cat(code)
})
output$vis4 <- renderPlotly({
ggplotly(vis_4)
})
output$code21 <- renderPrint({
code <- 'vis_5 <- df_sleep_clean %>%
ggplot(., aes(x = BMI.Category,
y = Sleep.Duration,
fill = BMI.Category)) +
geom_boxplot(width = 5) +
coord_flip() +
scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
theme_minimal() +
labs(title = "Sleep Duration based on BMI Category",
x = "BMI Category",
y = "Sleep Duration",
fill = "BMI Category") +
expand_limits(x = 0, y = 0)
ggplotly(vis_5)'
cat(code)
})
output$vis5 <- renderPlotly({
ggplotly(vis_5)
})
output$code22 <- renderPrint({
code <- 'vis_6 <- df_sleep_clean %>%
ggplot(., aes(x = Sleep.Disorder,
y = Age,
fill = Sleep.Disorder)) +
geom_boxplot(width = 5) +
coord_flip() +
scale_fill_manual(values = c("#9BDF96", "#5D9D57", "#3C7237")) +
theme_minimal() +
labs(title = "Age Distribution based on Sleep Disorder",
x = "Sleep Disorder",
y = "Age",
fill = "Sleep Disorder") +
expand_limits(x = 0, y = 0)
ggplotly(vis_6)'
cat(code)
})
output$vis6 <- renderPlotly({
ggplotly(vis_6)
})
output$code24 <- renderPrint({
code <- 'heatmap <- df_sleep_clean %>%
select_if(is.numeric) %>%
cor() %>%
melt() %>%
ggplot(aes(x = Var1,
y = Var2,
fill = value,
label = round(value, 2),
text = paste("Variable 1 :", Var1,
"<br>Variable 2 :", Var2,
"<br>Correlation Coefficient :", round(value, 2)))) +
geom_tile(color = "white") +
geom_text(color = "black") +
scale_fill_gradient2(low = "#FF751A",
mid = "white",
high = "#3C7237",
midpoint = 0,
limit = c(-1, 1),
space = "Lab",
name = "Correlation") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 0,
vjust = 0.5,
hjust = 0.5),
axis.text.y = element_text(hjust = 1)) +
labs(title = "Correlation Heatmap",
x = "",
y = "") +
scale_x_discrete(labels = function(x) str_wrap(str_replace_all(x, "\\.", " "), width = 10)) +
scale_y_discrete(labels = function(x) str_wrap(str_replace_all(x, "\\.", " "), width = 10))
ggplotly(heatmap, tooltip = "text")'
cat(code)
})
output$heatmap <- renderPlotly({
ggplotly(heatmap, tooltip = "text")
})
observeEvent(input$submitbutton, {
reg <- df_sleep_clean %>%
ggplot(., aes_string(x = input$variable1,
y = input$variable2)) +
geom_jitter(color = "#5D9D57") +
geom_smooth(method = "lm",
se = FALSE,
color = "red") +
labs(title = paste("Linear Regression", input$variable1, "vs", input$variable2),
x = paste(input$variable1),
y = paste(input$variable2))
output$visualization <- renderPlotly({
ggplotly(reg)
})
formula <- as.formula(paste(input$variable1, "~", input$variable2))
model <- lm(formula, data = df_sleep_clean)
output$summary <- renderPrint({
summary(model)
})
})
observeEvent(input$submitbutton2, {
regression <- reactive({
lm(formula = paste(input$variable1h, "~", input$variable2h), data = df_sleep_clean)
})
output$htest <- renderPrint({
summary_coef <- summary(regression())
p_value <- summary_coef$coefficients[input$variable2h, "Pr(>|t|)"]
print(p_value)
if (p_value < 0.05) {
print(paste(input$variable2h, "affects", input$variable1h))
}
else {
print(paste(input$variable2h, "does not affect", input$variable1h))
}
})
})
}
shinyApp(ui = ui,
server = server)